import pandas as pd
from collections import Counter

#===============================================================================#
#
# Table 2: Comparing the Internet Sources of Humans and Social Bots
#
#===============================================================================#

df = pd.read_csv('data/url_data.csv', sep='\t',header=0)
spam_filter = ['myshopify.com','sportspaandu.com']

cbot = Counter([item for sublist in [d.split() for d in df[(pd.notnull(df.doms)) & (df.bot=='bot')].doms.tolist()] for item in sublist])
chum = Counter([item for sublist in [d.split() for d in df[(pd.notnull(df.doms)) & (df.bot=='human')].doms.tolist()] for item in sublist])
total_bot = sum(cbot.values())
total_human = sum(chum.values())

mc_hum = [(dm,val) for dm,val in chum.most_common(50) if dm not in spam_filter][0:20]
mc_bot = [(dm,val) for dm,val in cbot.most_common(50) if dm not in spam_filter][0:20]

results = pd.DataFrame({'Humans: Domain' : [dm for dm, val in mc_hum],
                       'Humans: Count' : [val for dm, val in mc_hum],
                       'Humans: Pct' : ['%0.1f' %(val/total_human*100) for dm, val in mc_hum],
                       'Bots: Domain' : [dm for dm, val in mc_bot],
                       'Bots: Count' : [val for dm, val in mc_bot],
                       'Bots: Pct' : ['%0.1f' %(val/total_bot*100) for dm, val in mc_bot]})

with open('tables/table2.txt', 'w') as fout:
    print(results.to_string(index=False), file=fout)

